<?php
// +----------------------------------------------------------------------
// | 行政区划
// +----------------------------------------------------------------------

namespace helper\util;

class Region
{
    private string $url = 'http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/';

    public function __construct($year = 2022)
    {
        $this->url = $this->url . $year;
    }

    /**
     * 获取数据
     * @param int $level
     * @return array
     */
    public function all($level = 3)
    {
        // 省
        $provinces = $this->province();
        $data = $provinces;
        foreach ($provinces as $province) {
            // 市
            $cities = $this->city($province['adcode']);
            $data = array_merge($cities, $data);
            foreach ($cities as $city) {
                // 区/县
                $districts = $this->district($city['adcode']);
                $data = array_merge($districts, $data);
                foreach ($districts as $district) {
                    // 街道/乡镇
                    $streets = $this->street($district['adcode']);
                    $data = array_merge($streets, $data);
                    foreach ($streets as $street) {
                        // 社区/村
                        $villages = $this->village($street['adcode']);
                        $data = array_merge($villages, $data);
                    }
                }
            }
        }
        return $data;
    }

    /**
     * 获取所有省
     * @return array
     */
    public function province(): array
    {
        return $this->curl_request($this->url . '/index.html', '0', 'provincetr', 'province');
    }

    /**
     * 获取市
     * @param string $province_code 省代码
     * @return array
     */
    public function city(string $province_code): array
    {
        return $this->curl_request($this->url . "/$province_code.html", $province_code, 'citytr', 'city');
    }

    /**
     * 获取区县
     * @param string $city_code
     * @return array
     */
    public function district(string $city_code): array
    {
        $province_code = substr($city_code, 0, 2);
        return $this->curl_request($this->url . "/" . $province_code . "/{$city_code}.html", $city_code, 'countytr', 'district');
    }

    /**
     * 获取乡镇
     * @param string $district_code
     * @return array
     */
    public function street(string $district_code): array
    {
        $province_code = substr($district_code, 0, 2);
        $city_code = substr($district_code, 2, 2);
        return $this->curl_request($this->url . "/" . $province_code . "/$city_code" . "/{$district_code}.html", $district_code, 'towntr', 'street');
    }

    /**
     * 获取村
     * @param string $street_code
     * @return array
     */
    public function village(string $street_code): array
    {
        $province_code = substr($street_code, 0, 2);
        $city_code = substr($street_code, 2, 2);
        $district_code = substr($street_code, 4, 2);
        return $this->curl_request($this->url . "/$province_code/$city_code/$district_code/$street_code.html", $street_code, 'villagetr', 'village');
    }

    /**
     * 发起请求
     * @param string $url
     * @param string $code
     * @param string $str
     * @param string $level
     * @return array
     */
    private function curl_request(string $url, string $code, string $str, string $level): array
    {
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_ENCODING, 'gzip');
        $data = curl_exec($curl);
        curl_close($curl);

        // 转换编码
        $data = mb_convert_encoding($data, 'UTF-8', 'GBK');

        // 裁头
        $offset = @mb_strpos($data, $str, 2000);
        if (!$offset) {
            return [];
        }
        $data = mb_substr($data, $offset, null);

        // 裁尾
        $offset = mb_strpos($data, '</TABLE>', 200);
        $offset = $offset ?: mb_strpos($data, '</table>', 200);
        $data = mb_substr($data, 0, $offset);

        switch ($level) {
            case 'province':
                $length = 2;
                $pattern = '/\d{2}|[\x7f-\xff]+/';
                break;
            case 'city':
                $length = 4;
                $pattern = '/\d{4}$|[^>]+(?=<\/a>)/';
                break;
            case 'district' :
                $length = 6;
                $pattern = '/\d{6}$|[^>]+(?=<\/a>)/';
                break;
            case 'street':
                $length = 9;
                // 注: 有的中文中间有.()等标点符号,所以不能直接匹配中文
                $pattern = '/\d{9}$|[^>]+(?=<\/a>)/';
                break;
            case 'village':
                $length = 12;
                $pattern = '/\d{12}|[\x7f-\xff]+/';
                break;
            default :
                $length = 0;
                $pattern = '//';
        }

        preg_match_all($pattern, $data, $out);
        $out = $out[0];

        $lists = [];
        for ($i = 0; $i < count($out); $i++) {
            $lists[] = [
                'parent_adcode' => $code,
                'adcode' => substr($out[$i++], 0, $length),
                'name' => $level == 'village' ? str_replace(['村委会', '居委会', '居民委员会', '村民委员会', '经济开发区社区'], ['村', '', '', '', '经济开发区'], $out[$i]) : $out[$i],
                'level' => $level
            ];
        }
        return $lists;
    }
}